The purpose of this workflow is to explore the effect of two human Klotho variants on gene expression in mice. It follows directly from 1.Klotho_Initial_Data_Visualization.Rmd and depends on output from that workflow.

rm(list = ls())

library(here)

fdr.thresh = 0.1 #fdr for differentially expressed genes

args <- commandArgs(trailingOnly=T)
subgroup <- args[2]

if(is.na(subgroup)){
  #subgroup <- "all ages"
  subgroup <- list("age_batch" = 12)
  #subgroup <- list("age_batch" = 4)
  #subgroup <- list("age_batch" = 4, "sex_climb" = "Female") #example with more than one filter
}

if(subgroup == "all ages"){
  results.dir <- here("Results", "all")
}else{
  subgroup.results <- paste(sapply(1:length(subgroup), 
    function(x) paste(names(subgroup)[x], subgroup[[x]], sep = "_")), 
    collapse = "_")
  results.dir <- here("Results", subgroup.results)
}

general.data.dir <- here("Data", "general")

The subgroup analyzed in this workflow is: age_batch = 12

all.fun <- list.files(here("Code"), full.names = TRUE)
for(i in 1:length(all.fun)){source(all.fun[i])}

processed.data.dir <- file.path(results.dir, "processed_data")
general.processed.data.dir <- here("Results", "Processed_Data")
mouse.data.dir <- here("Data", "Mouse")

geno.cols <- get_geno_cols(here("Data", "general", "geno_cols.csv"))
needed.libraries <- c("pheatmap", "vioplot", "RColorBrewer",
  "gprofiler2", "cluster", "pathview", "clusterProfiler", "stringr", "igraph", "wordcloud",
  "wordcloud2")
load_libraries(needed.libraries)
mouse.info <- read.csv(file.path(mouse.data.dir, "metadata_validated.csv"))
orthos <- read.delim(file.path(general.data.dir, "human.mouse.orthologs.txt"))
mouse.genes <- read.delim(here("Data", "general", "mouse_gene_info.txt"))
covar.table <- read.table(file.path(processed.data.dir, "mouse_info.csv"), sep = ",", header = TRUE, row.names = 1)
raw.expr <- read.table(here("Data", "Mouse", "rsem.merged.gene_counts.tsv"), header = TRUE)
norm.expr <- readRDS(file.path(processed.data.dir, "Normalized_Expression.RDS"))
scaled.expr <- readRDS(file.path(processed.data.dir, "Scaled_Expression.RDS"))

#gene lists for KEGG, GO, Biodomains, and the intersections
bd.list <- readRDS(file.path(general.processed.data.dir, "Mouse_Biodomains_for_GSEA.RDS"))
sub.bd.list <- readRDS(file.path(general.processed.data.dir, "Mouse_Subdomains_for_GSEA.RDS"))
bd.go.list <- readRDS(file.path(general.processed.data.dir, "Mouse_Biodomains_sub_GO_for_GSEA.RDS"))
kegg.list <- readRDS(file.path(general.processed.data.dir, "Mouse_KEGG_for_GSEA.RDS"))
kegg.bd.list <- readRDS(file.path(general.processed.data.dir, "Mouse_KEGG_Intersections_for_GSEA.RDS"))

We adjusted the expression by non-age covariates.

dummy.covar <- dummy_covar(covar.table[,c("sequencingBatch", "sex_ge")])
adj_expr <- adjust(t(scaled.expr), dummy.covar)

We made a set of matrices for which rows were KEGG-Biodomain intersections and columns were individual mice. The entry in each cell is the mean expression value of those genes for that mouse. (Initially, we made one matrix for each KEGG pathway. In each, the rows specify the biodomains.)

#This function returns mean gene expression values for a vector of genes IDs
#given the expression mat. The expression matrix should have genes in columns
#and individuals in rows
gene_mean_mat <- function(geneID, expr.mat){
    common.genes <- intersect(geneID, colnames(expr.mat))
    if(length(common.genes) > 0){
        gene.means <- rowMeans(expr.mat[,common.genes,drop=FALSE])
    }else{
        gene.means <- NULL
    }
    return(gene.means)
}

#This function returns individual means for a non-nested
#list of genes, like in bd.list
non_nested_mean_vals <- function(non_nested_gene_list, expr.mat){
    mean.mat <- matrix(NA, nrow = length(non_nested_gene_list), ncol = nrow(expr.mat))
    rownames(mean.mat) <- names(non_nested_gene_list)    
    colnames(mean.mat) <- rownames(expr.mat)
    for(i in 1:length(non_nested_gene_list)){
        gene.means <- gene_mean_mat(non_nested_gene_list[[i]], expr.mat)
        if(length(gene.means) > 0){
            mean.mat[i,] <- gene.means
        }
    }
    return(mean.mat)
}

#This function creates matrices of mean gene values for individual
#mice for a nested gene list, like kegg.bd.list
#organization lets you toggle between having the rows of each
#matrix be the outer level or the inner level
nested_mean_vals <- function(nested_gene_list, expr.mat, organization = c("outer", "inner")){

    organization <- organization[1]

    #the outer level makes up the rows of each matrix (i.e. biodomains)
    if(organization == "outer"){
        u_inner <- unique(unlist(lapply(nested_gene_list, names)))
        mean.gene.list <- vector(mode = "list", length = length(u_inner))
        names(mean.gene.list) <- u_inner
        #get the gene list for each inner
        for(i in 1:length(u_inner)){
            i.gene.idx <- lapply(nested_gene_list, function(x) which(names(x) == u_inner[i]))
            i.gene.list <- lapply(1:length(i.gene.idx), 
                function(x) if(length(i.gene.idx[[x]]) > 0){nested_gene_list[[x]][[i.gene.idx[[x]]]]})
            names(i.gene.list) <- names(i.gene.idx)
            i.gene.vals <- non_nested_mean_vals(non_nested_gene_list = i.gene.list, expr.mat)
            #pheatmap(i.gene.vals)
            mean.gene.list[[i]] <- i.gene.vals
        }
    }
    
    if(organization == "inner"){
        u_outer <- names(nested_gene_list)
        mean.gene.list <- vector(mode = "list", length = length(u_outer))
        for(o in 1:length(u_outer)){
            if(length(nested_gene_list[[o]]) > 0){
                o.gene.vals <- non_nested_mean_vals(non_nested_gene_list = nested_gene_list[[o]], expr.mat)
                mean.gene.list[[o]] <- o.gene.vals
            }
        }
    }

    return(mean.gene.list)
}

plot_mean_bd_k <- function(mean.bd.k.mat, covar.mat, plot.label = "",
    row.names = rownames(mean.bd.k.mat), label.margin = 12, 
    order.by = c("PC", "top.rank"), plot.results = TRUE,
    global.min = NULL, global.max = NULL){

    order.by <- order.by[1]
    sub.decomp <- NULL
    pc.r2 <- NULL
    min.val <- global.min
    max.val <- global.max

    if(is.null(min.val)){
        min.val <- min(mean.bd.k.mat, na.rm = TRUE)
    }
    if(is.null(max.val)){
        max.val <- max(mean.bd.k.mat, na.rm = TRUE)
    }
    
    neg.vals <- length(which(mean.bd.k.mat < 0))
    pos.vals <- length(which(mean.bd.k.mat > 0))
    
    if(neg.vals > 0 && pos.vals > 0){
      col.scale = c("blue", "brown")
      grad.dir = "ends"
      split.at.vals = TRUE
    }
    if(neg.vals > 0 && pos.vals == 0){
      col.scale = "blue"
      grad.dir = "low"
      split.at.vals = TRUE
    }
    if(neg.vals == 0 && pos.vals > 0){
      col.scale = "brown"
      grad.dir = "high"
      split.at.vals = TRUE
    }

    has.vals <- which(!is.na(rowMeans(mean.bd.k.mat)))
    if(length(has.vals) > 0){
        sub.mat <- mean.bd.k.mat[has.vals,,drop=FALSE]

        geno.names <- lapply(names(geno.cols), function(x) rownames(covar.mat)[which(covar.mat[,"climb_geno"] == x)])
        cols <- lapply(1:length(geno.names), function(x) rep(geno.cols[x], length(geno.names[[x]])))
        col.table <- cbind(unlist(geno.names), unlist(cols))
        col.mat <- sapply(colnames(mean.bd.k.mat), function(x) col.table[match(x, col.table[,1]),2])
                
        if(length(has.vals) > 1){
            sub.decomp <- plot.decomp(t(sub.mat), cols = col.mat, main = "Decomposition", 
                cex = 2, plot.result = FALSE)
            pc.model <- lm(sub.decomp$u[,1]~as.factor(col.table[,2]))
            #pc.vals <- lapply(levels(as.factor(col.table[,2])), function(x) sub.decomp$u[which(col.table[,2] == x),1])
            #val.order <- order(sapply(pc.vals, mean))
            #boxplot(pc.vals[val.order]);abline(h = 0)
            #plot.with.model(sub.decomp$u[,1], mean.bd.k.mat["Structural Stabilization",], col = col.table[,2])
            pc.r2 <- summary(pc.model)$adj.r.squared
        }else{
            order.by = "top.rank"
        }
        

        test <- apply(mean.bd.k.mat, 1, function(x) if(!all(is.na(x))){lm(x~as.factor(covar.mat[,"climb_geno"]))}else{NA})
        ind.r2 <- sapply(test, function(x) if(length(x) > 1){summary(x)$adj.r.squared}else{NA})
        names(ind.r2) <- rownames(mean.bd.k.mat)
        r2.order <- order(ind.r2)

        if(order.by == "top.rank"){
            col.order <- order(mean.bd.k.mat[which.max(ind.r2),])
        }else{
            col.order <- order(colMeans(mean.bd.k.mat, na.rm = TRUE))
        }

        if(plot.results){

            #layout.mat <- matrix(c(1,1,1,1,2,1,1,1,1,2,5,5,5,5,0,0,0,0,4,4,3,3,3,4,4,3,3,3,4,4), ncol = 6, byrow = FALSE)
            layout.mat <- matrix(c(1,1,1,1,2,1,1,1,1,2,5,5,5,5,0,6,6,6,7,7,6,6,6,4,4,3,3,3,4,4,3,3,3,4,4), ncol = 7, byrow = FALSE)
            layout(layout.mat, widths = c(1, 1, 0.5, 0.5, 0.5, 0.5))
            
            #pane 1 is the heat map for individual values
            par(mar = c(0,label.margin,2,0))
            imageWithText(mean.bd.k.mat[rev(r2.order),col.order,drop=FALSE], show.text = FALSE, 
                col.names = NULL, row.text.shift = 0.015, col.scale = col.scale, grad.dir = grad.dir,
                split.at.vals = split.at.vals, color.fun = "linear",
                light.dark = "f", global.color.scale = TRUE, global.min = min.val, 
                global.max = max.val, row.names = rev(row.names[r2.order]))

            mtext(plot.label, side = 3, line = 0)
            plot.dim <- par("usr")    
            color.coord <- segment_region(plot.dim[1], plot.dim[2], length(col.mat))
            coord.dist <- apply(consec_pairs(color.coord), 1, function(x) x[2] - x[1])/2
        
            #pane 2 is color bars for individual gentotypes
            par(mar = c(2,label.margin,0,0))
            plot.new()
            plot.window(xlim = c(plot.dim[1], plot.dim[2]), ylim = c(0,1))
            for(i in 1:length(col.mat)){
                draw.rectangle(color.coord[i]-coord.dist[1], color.coord[i]+coord.dist[1],
                    0, 1, fill = col.mat[col.order[i]], border = NA)
            }

            #pane 3 is the decomposition
            par(mar = c(4,4,4,2))
            if(length(has.vals) > 1){
                sub.decomp <- plot.decomp(t(sub.mat), cols = col.mat, main = "Decomposition", cex = 2)
                pc.order <- order(sub.decomp$u[,1])
                
                #pane 4 is the loadings on the matrix rows colored by genotype
                par(mar = c(2,2,0,2))
                barplot(sub.decomp$u[pc.order,1], col = col.mat[pc.order])
                mtext("PC1 by genotype", side = 3, line = -1.5)
                text(nrow(sub.decomp$u)*0.6, -0.1, paste("R^2 =", signif(pc.r2, 2)))
                #plot.decomp(sub.mat, label.points = TRUE)
            }else{
                plot.text("Not enough rows to decompose")
                plot.text("Not enough rows to decompose")
            }
        

            #pane 5 is the bar plot of r2 for each row
            #add the adjusted R2 from the model
            par(mar = c(0,0,2,2))
            barplot(ind.r2[r2.order], horiz = TRUE, las = 2, xlim = c(0, 0.4), names = NA)
            plot.dim <- par("usr")
            segments(x0 = 0, y0 = plot.dim[3], y1 = plot.dim[4])
            #add grid lines
            segments(x0 = seq(0, 0.4, 0.1), y0 = plot.dim[3], y1 = plot.dim[4], 
                lty = 2, col = "darkgray")
            
            #pane 6 is the violin plots
            #add violin plot of PC vals for highest row of the matrix
            #or the overall PC values depending on the column order

            par(mar = c(4,0,4,2))
            genos <- levels(as.factor(covar.table[,"climb_geno"]))
            if(order.by == "top.rank"){
                max.idx <- which.max(ind.r2)
                row.vals <- lapply(genos, 
                    function(x) mean.bd.k.mat[max.idx,which(covar.table[,"climb_geno"] == x)]) 
                plot.label <- names(max.idx)
            }else{
                row.vals <- lapply(genos, 
                    function(x) sub.decomp$u[which(covar.table[,"climb_geno"] == x),1])
                plot.label = "PC1"
            }
            mean.order <- order(sapply(row.vals, mean))
            vioplot(row.vals[mean.order], col = geno.cols[genos[mean.order]], 
                names = genos[mean.order], las = 2, main = plot.label)
            stripchart(row.vals[mean.order], add = TRUE, col = "#dd1c77", 
                vertical = TRUE, pch = 16, method = "jitter")
            plot.dim <- par("usr")
            segments(x0 = plot.dim[1], x1 = plot.dim[2], y0 = 0)

            #pane 7 is a legend for the genotype colors
            plot.new()
            plot.window(xlim = c(0,1), ylim = c(0,1))
            legend(0, 0.9, fill = geno.cols, legend = names(geno.cols))
        }

    }#end case for no values
    result <- list("decomp" = sub.decomp, "row.r2" = ind.r2, "overall.r2" = pc.r2)
    invisible(result)
}

merge_mean_mats <- function(mat_list){
    sub.mats <- Reduce("rbind", mat_list)
    inner.names <- lapply(mat_list, rownames)
    outer.names <- lapply(1:length(inner.names), function(x) rep(names(inner.names)[x], length(inner.names[[x]])))
    name.mat <- cbind(unlist(outer.names), unlist(inner.names))
    mat.labels <- apply(name.mat, 1, function(x) paste(x, collapse = " : "))        
    rownames(sub.mats) <- mat.labels
    sub.vals <- sub.mats[which(!is.na(rowMeans(sub.mats))),]
    return(sub.vals)
}

gene_vioplot <- function(gene.name){
    gene.id <- mouse.genes[which(mouse.genes[,"external_gene_name"] == gene.name),"ensembl_gene_id"]
    geno.expr <- lapply(geno.idx, function(x) adj_expr[x,gene.id])
    model <- lm(adj_expr[,gene.id]~as.factor(covar.table[,"climb_geno"]))
    f <- summary(model)$fstatistic
    p <- pf(f[1],f[2],f[3],lower.tail=F)
    geno.order <- order(sapply(geno.expr, mean))
    vioplot(geno.expr[geno.order], col = geno.cols[geno.order], 
        main = paste(gene.name, "\np =", signif(p, 2)))
    stripchart(geno.expr[geno.order], method = "jitter", vertical = TRUE, 
        pch = 16, col = "#dd1c77", add = TRUE)
    abline(h = 0)
}

#if inner.term is NULL, a matrix is assumed.
#if it is specified, a list is assumed
#mean.term.vals is a matrix of mean values like mean.k.vals
#or a list, like mean.bd.k.vals, in which case, inner.term
#needs to be specified.
term_vioplot <- function(outer.term, inner.term = NULL, mean.term.vals){
    if(is.null(inner.term)){
        term.idx <- which(rownames(mean.term.vals) == outer.term)
        if(length(term.idx) == 0){stop(paste("I can't find", outer.term))}
        term.expr <- lapply(geno.idx, function(x) mean.term.vals[term.idx,x])
        plot.label <- outer.term    
    }else{
        #otherwise we have a list
        outer.idx <- which(names(mean.term.vals) == outer.term)
        if(length(outer.idx) == 0){stop(paste("I can't find", outer.term))}
        inner.idx <- which(rownames(mean.term.vals[[outer.idx]]) == inner.term)
        if(length(inner.idx) == 0){stop(paste("I can't find", inner.term))}
        term.expr <- lapply(geno.idx, function(x) mean.term.vals[[outer.idx]][inner.idx,x])
        plot.label <- paste(outer.term, inner.term, sep = " : ")
    }

    model <- lm(unlist(term.expr)~as.factor(covar.table[unlist(geno.idx),"climb_geno"]))
    f <- summary(model)$fstatistic
    p <- pf(f[1],f[2],f[3],lower.tail=F)
    geno.order <- order(sapply(term.expr, mean))
    vioplot(term.expr[geno.order], col = geno.cols[geno.order], 
        main = paste(plot.label, "\np =", signif(p, 2)))
    stripchart(term.expr[geno.order], method = "jitter", vertical = TRUE, 
        pch = 16, col = "#dd1c77", add = TRUE)
    abline(h = 0)

    invisible(term.expr)

}

Biodomains

We first looked at biodomains and KEGG pathways separately The plots below show the results for Biodomains.

mean.bd.vals <- non_nested_mean_vals(bd.list, expr.mat = adj_expr)

bd.result <- plot_mean_bd_k(mean.bd.k.mat = mean.bd.vals, 
    covar.mat = covar.table, plot.results = FALSE)

par(mar = c(4,20,2,2))
barplot(sort(bd.result$row.r2), horiz = TRUE, las = 2, xlab = "Adjusted R2",
    main = "Biodomains")

Biodomain plots

The plots below show how well each biodomain separates the genotypes

geno.idx <- lapply(names(geno.cols), function(x) which(covar.table[,"climb_geno"] == x))
names(geno.idx) <- names(geno.cols)

#pdf("~/Desktop/biodomains.pdf", width = 7, height = 5)
for(bd in 1:length(bd.list)){
    cat("####", names(bd.list)[bd], "\n")
    geno.vals <- lapply(geno.idx, function(x) mean.bd.vals[bd,x])
    geno.order <- order(sapply(geno.vals, mean))
    model <- lm(mean.bd.vals[bd,]~as.factor(covar.table[,"climb_geno"]))
    f <- summary(model)$fstatistic
    p <- pf(f[1],f[2],f[3],lower.tail=F)
    vioplot(geno.vals[geno.order], col = geno.cols[geno.order], 
        main = paste(names(bd.list)[bd], "\np =", signif(p, 2)))
    stripchart(geno.vals[geno.order], vertical = TRUE, pch = 16, method = "jitter",
        add = TRUE, col = "#dd1c77")
    abline(h = 0)
    cat("\n\n")
}

Apoptosis

APP Metabolism

Autophagy

Cell Cycle

DNA Repair

Endolysosome

Epigenetic

Immune Response

Lipid Metabolism

Metal Binding and Homeostasis

Mitochondrial Metabolism

Myelination

Oxidative Stress

Proteostasis

RNA Spliceosome

Structural Stabilization

Synapse

Tau Homeostasis

Vasculature

#dev.off()

Subdomains

We also looked at the subdomains. The boxplot below shows the distribution of adjusted R2 for the subdomains within each domain.

mean.sbd.vals <- nested_mean_vals(nested_gene_list = sub.bd.list, expr.mat = adj_expr, 
    organization = "inner")

sbd.result <- lapply(mean.sbd.vals, function(x) if(length(x) > 0){plot_mean_bd_k(x, 
    covar.mat = covar.table, plot.results = FALSE)}else{NA})

sbd.r2 <- lapply(sbd.result, function(x) if(length(x) > 1){x$row.r2}else{NA})
names(sbd.r2) <- names(sub.bd.list)
#r2.order <- order(sapply(sbd.r2, function(x) if(length(x) > 0){mean(x, na.rm = TRUE)})) #by biodomain mean
r2.order <- order(bd.result$row.r2) #by overall biodomain r2
par(mar = c(4,16,2,2))
boxplot(sbd.r2[r2.order], las = 2, horizontal = TRUE)
abline(v = seq(0, 0.3, 0.1), lty = 2, col = "darkgray")
points(x = bd.result$row.r2[r2.order], y = 1:length(r2.order), col = "blue",
    pch = 16)

Subdomain bar plots

The bar plots below shows the R^2 values within each biodomain.

#pdf("~/Desktop/subdomain_r2.pdf", width = 10, height = 7)
max.r2 <- max(unlist(sbd.r2), na.rm = TRUE)
for(bd in 1:length(sbd.r2)){
    if(length(sbd.r2[[bd]]) > 1){
        cat("####", names(sbd.r2)[bd], "\n")
        par(mar = c(4,30,2,2))
        r2.vals <- sbd.r2[[bd]]
        r2.vals[which(r2.vals < 0)] <- 0
        barplot(sort(r2.vals), horiz = TRUE, las = 2, xlab = "Adjusted R2",
            main = names(sbd.r2)[bd], xlim = c(0, max.r2))
        abline(v = seq(0, max.r2, 0.1), lty = 2, col = "darkgray")
        cat("\n\n")
    }
}

Apoptosis

APP Metabolism

Autophagy

Cell Cycle

DNA Repair

Endolysosome

Epigenetic

Immune Response

Lipid Metabolism

Metal Binding and Homeostasis

Mitochondrial Metabolism

Myelination

Oxidative Stress

Proteostasis

Structural Stabilization

Synapse

Tau Homeostasis

Vasculature

#dev.off()

Overall

The plot below shows the top subdomains overall

all.sub.r2 <- unlist(sbd.r2)
#hist(all.sub.r2, breaks = 25)
big.r2 <- which(all.sub.r2 >= 0.15)

#pdf("~/Desktop/top_r2.pdf", width = 10, height = 8)
par(mar = c(4,35,2,2))
barplot(sort(all.sub.r2[big.r2]), horiz = TRUE, las = 2)
abline(v = seq(0, max(all.sub.r2, na.rm = TRUE), 0.1), lty = 2)

#dev.off()

Subdomain violin plots

The plots below show how well each subdomain separates the genotypes

#pdf("~/Desktop/subdomains.pdf", width = 7, height = 5)
for(bd in 1:length(sub.bd.list)){
    cat("####", names(sub.bd.list)[bd], "{.tabset .tabset-fade .tabset-pills}\n")
    if(length(sub.bd.list[[bd]]) > 0){  
        for(s in 1:length(sub.bd.list[[bd]])){
            cat("#####", names(sub.bd.list[[bd]])[s], "\n")  
            geno.vals <- lapply(geno.idx, function(x) mean.sbd.vals[[bd]][s,x])
            geno.order <- order(sapply(geno.vals, mean))
            model <- lm(unlist(geno.vals)~as.factor(covar.table[unlist(geno.idx),"climb_geno"]))
            f <- summary(model)$fstatistic
            p <- pf(f[1],f[2],f[3],lower.tail=F)
            vioplot(geno.vals[geno.order], col = geno.cols[geno.order], 
                main = paste(names(sub.bd.list)[bd], "\n", names(sub.bd.list[[bd]])[s], "\np =", signif(p, 2)))
            stripchart(geno.vals[geno.order], vertical = TRUE, pch = 16, method = "jitter",
                add = TRUE, col = "#dd1c77")
            abline(h = 0)
            cat("\n\n")
        }
    }
    cat("\n\n")
}

Apoptosis

intrinsic apoptotic signaling pathway

extrinsic apoptotic signaling pathway

regulation of oxidative stress-induced intrinsic apoptotic signaling pathway

neuron apoptotic process

apoptotic mitochondrial changes

canonical NF-kappaB signal transduction

inflammatory cell apoptotic process

regulation of cysteine-type endopeptidase activity involved in apoptotic process

programmed necrotic cell death

apoptotic process involved in development

glial cell apoptotic process

APP Metabolism

amyloid-beta clearance

amyloid precursor protein metabolic process

amyloid precursor protein biosynthetic process

amyloid-beta formation

Autophagy

macroautophagy

phagocytosis

mitophagy

Cell Cycle

cell cycle phase transition

DNA replication

microtubule cytoskeleton organization involved in mitosis

DNA Repair

global genome nucleotide-excision repair

DNA damage response

DNA repair complex

Endolysosome

clathrin-coated vesicle

receptor-mediated endocytosis

endocytic vesicle

early endosome

late endosome

lysosome

recycling endosome

endosomal transport

receptor internalization

exocytic vesicle

Epigenetic

histone modification

miRNA-mediated post-transcriptional gene silencing

DNA-binding transcription factor binding

Immune Response

activation of innate immune response

adaptive immune response

cytokine production

phagocytosis

neuroinflammatory response

leukocyte chemotaxis

behavioral defense response

Lipid Metabolism

lipid transport

lipid storage

fatty acid biosynthetic process

fatty acid metabolic process

lipid catabolic process

phosphatidylinositol metabolic process

phospholipid binding

glycerolipid metabolic process

cholesterol metabolic process

triglyceride metabolic process

glycolipid metabolic process

sphingolipid metabolic process

phosphatidylcholine metabolic process

phosphatidylserine metabolic process

Metal Binding and Homeostasis

cellular response to metal ion

regulation of metal ion transport

metal ion binding

oxidoreductase activity, acting on metal ions

metallopeptidase activity

Mitochondrial Metabolism

proton motive force-driven mitochondrial ATP synthesis

mitochondrial transport

mitochondrial membrane organization

apoptotic mitochondrial changes

electron transport chain

tricarboxylic acid cycle

mitochondrial translation

mitochondrion localization

mitochondrial calcium ion homeostasis

autophagy of mitochondrion

energy derivation by oxidation of organic compounds

Myelination

oligodendrocyte development

Schwann cell development

axon ensheathment

axon regeneration

Oxidative Stress

reactive oxygen species metabolic process

response to oxidative stress

intrinsic apoptotic signaling pathway in response to oxidative stress

Proteostasis

translation

proteasomal protein catabolic process

Golgi organization

endoplasmic reticulum

response to endoplasmic reticulum stress

vesicle-mediated transport to the plasma membrane

RNA Spliceosome

Structural Stabilization

actin filament organization

cell-substrate adhesion

microtubule polymerization or depolymerization

cell-cell junction organization

extracellular matrix organization

Synapse

trans-synaptic signaling

axon regeneration

axonal transport

dendritic transport

synaptic vesicle cycle

postsynapse organization

presynapse organization

learning or memory

Tau Homeostasis

tau-protein kinase activity

neurofibrillary tangle assembly

Vasculature

cardiac conduction

angiogenesis

endothelial cell migration

vasodilation

vascular associated smooth muscle cell development

maintenance of blood-brain barrier

vascular endothelial growth factor receptor signaling pathway

vascular associated smooth muscle cell apoptotic process

#dev.off()

Subdomain Heatmaps

For each subdomain, we calculated the mean expression across the genotypes. The plots below show these means grouped by biodomain.

#pdf("~/Desktop/sub_plots.pdf", width = 12, height = 5)
sd.global.min <- min(unlist(mean.sbd.vals), na.rm = TRUE)
sd.global.max <- max(unlist(mean.sbd.vals), na.rm = TRUE)
for(bd in 1:length(mean.sbd.vals)){
    if(length(mean.sbd.vals[[bd]]) > 0){
        cat("####", names(sub.bd.list)[bd], "\n")
        plot_mean_bd_k(mean.sbd.vals[[bd]], covar.mat = covar.table,
            label.margin = 24, global.min = sd.global.min, 
            global.max = sd.global.max)
        cat("\n\n")
    }
}

Apoptosis

APP Metabolism

Autophagy

Cell Cycle

DNA Repair

Endolysosome

Epigenetic

Immune Response

Lipid Metabolism

Metal Binding and Homeostasis

Mitochondrial Metabolism

Myelination

Oxidative Stress

Proteostasis

Structural Stabilization

Synapse

Tau Homeostasis

Vasculature

#dev.off()

KEGG pathways

The plots below show results for KEGG pathways. Because there are so many KEGG pathways, the bar plot only shows the top 20 results.

mean.k.vals <- non_nested_mean_vals(kegg.list, expr.mat = adj_expr)

k.result <- plot_mean_bd_k(mean.bd.k.mat = mean.k.vals, 
    covar.mat = covar.table, plot.results = FALSE)

par(mar = c(4,25,2,2))
barplot(tail(sort(k.result$row.r2), 25), horiz = TRUE, las = 2, xlab = "Adjusted R2",
    main = "KEGG pathways")

KEGG plots

The plots below show how well each biodomain separates the genotypes

k.order <- order(k.result$row.r2, decreasing = TRUE)
#pdf("~/Desktop/kegg.pdf", width = 7, height = 5)
kegg.mean.mat <- matrix(NA, nrow = 25, ncol = 5)
rownames(kegg.mean.mat) <- names(k.result$row.r2)[k.order[1:25]]
colnames(kegg.mean.mat) <- names(geno.cols)
for(k in 1:25){
    cat("####", names(k.result$row.r2)[k.order[k]], "\n")
    geno.vals <- lapply(geno.idx, function(x) mean.k.vals[k.order[k],x])
    geno.order <- order(sapply(geno.vals, mean))
    model <- lm(mean.k.vals[k.order[k],]~as.factor(covar.table[,"climb_geno"]))
    f <- summary(model)$fstatistic
    p <- pf(f[1],f[2],f[3],lower.tail=F)
    vioplot(geno.vals[geno.order], col = geno.cols[geno.order], 
        main = paste(names(kegg.list)[k.order[k]], "\np =", signif(p, 2)))
    stripchart(geno.vals[geno.order], vertical = TRUE, pch = 16, method = "jitter",
        add = TRUE, col = "#dd1c77")
    abline(h = 0)
    kegg.mean.mat[k,] <- sapply(geno.vals, mean)
    cat("\n\n")
}

Type II diabetes mellitus

Taste transduction

Proteasome

Drug metabolism - other enzymes

Coronavirus disease - COVID-19

Chemical carcinogenesis - DNA adducts

Nicotine addiction

Prion disease

Axon guidance

Collecting duct acid secretion

Parkinson disease

Chemical carcinogenesis - reactive oxygen species

Amyotrophic lateral sclerosis

Huntington disease

Ribosome

Oxidative phosphorylation

Protein export

Cytosolic DNA-sensing pathway

Growth hormone synthesis, secretion and action

GnRH secretion

Endocrine and other factor-regulated calcium reabsorption

Non-alcoholic fatty liver disease

Base excision repair

Phagosome

Pathways of neurodegeneration - multiple diseases

#dev.off()

The plot below gives an overview of the expression of the top KEGG pathways.

mean.decomp <- plot.decomp(t(kegg.mean.mat), label.points = TRUE)

row.order <- order(mean.decomp$v[,1])
png("~/Desktop/kegg_mean.png", width = 11, height = 7, units = "in", res = 300)
layout.mat <- matrix(c(1,2), nrow = 1)
layout(layout.mat, widths = c(1, 0.2))
par(mar = c(4,25,2,2))
imageWithText(signif(kegg.mean.mat[row.order,], 2), split.at.vals = TRUE, 
    col.scale = c("blue", "brown"), grad.dir = "ends", col.text.shift = 0.03,
    row.text.shift = 0.15)
par(mar = c(20,4,2,4)) 
imageWithTextColorbar(signif(kegg.mean.mat[row.order,], 2), split.at.vals = TRUE, 
    col.scale = c("blue", "brown"), grad.dir = "ends", cex = 1)
dev.off()
## quartz_off_screen 
##                 2
#calculate mean gene expression for a given gene list
mean.bd.k.file <- file.path(results.dir, "KEGG_BD_individual_expression.RDS")
if(!file.exists(mean.bd.k.file)){
    mean.bd.k.vals <- nested_mean_vals(nested_gene_list = kegg.bd.list, 
        expr.mat = adj_expr, "outer")
    saveRDS(mean.bd.k.vals, mean.bd.k.file)
}else{
    mean.bd.k.vals <- readRDS(mean.bd.k.file)
}
min.val <- min(unlist(mean.bd.k.vals), na.rm = TRUE)
max.val <- max(unlist(mean.bd.k.vals), na.rm = TRUE)

kegg.plot.dir <- file.path(results.dir, "KEGG_plots")

all.bd.k.result <- vector(mode = "list", length = length(mean.bd.k.vals))
names(all.bd.k.result) <- names(mean.bd.k.vals)

pdf(file.path(kegg.plot.dir, "kegg_bd_intersections_individuals.pdf"), width = 15, height = 7)
hist(unlist(mean.bd.k.vals), xlab = "Mean Value", main = "Mean Biodomain-Kegg Intersection Expression")
for(i in 1:length(mean.bd.k.vals)){
  if(length(mean.bd.k.vals[[i]]) > 0){
    all.bd.k.result[[i]] <- plot_mean_bd_k(mean.bd.k.mat = mean.bd.k.vals[[i]], 
    covar.mat = covar.table, plot.label = names(mean.bd.k.vals)[i], order.by = "top.rank")
  }
}
dev.off()
## quartz_off_screen 
##                 2
all.r2 <- Reduce("rbind", lapply(all.bd.k.result, function(x) x$row.r2))
rownames(all.r2) <- names(all.bd.k.result)
all.r2[which(is.na(all.r2))] <- 0
all.r2[which(all.r2 < 0)] <- 0
row.order <- order(apply(all.r2, 1, max))
col.order <- order(apply(all.r2, 2, max))

pdf(file.path(results.dir, "allr2.pdf"), width = 9, height = 40)
pheatmap(all.r2[row.order,col.order], cluster_rows = FALSE, cluster_cols = FALSE)
dev.off()
mean.order <- order(apply(all.r2, 2, max))
par(mar = c(4, 16, 4, 2))
boxplot(all.r2[,mean.order], las = 2, horizontal = TRUE)

top.n = 10
par(mar = c(4, 24, 4, 2))
barplot(tail(sort(all.r2[,"APP Metabolism"]), top.n), las = 2, horiz = TRUE)


bd.k.genes <- unlist(kegg.bd.list, recursive = FALSE)
#bd.k.jaccard <- jaccard.matrix(bd.k.genes)
#hist(bd.k.jaccard)

kegg.bd.mat <- matrix(0, nrow = length(bd.list), ncol = length(kegg.list))
rownames(kegg.bd.mat) <- names(bd.list)
colnames(kegg.bd.mat) <- names(kegg.list)
k.bd.test <- vector(mode = "list", length = length(kegg.list))
names(k.bd.test) <- names(kegg.list)
for(i in 1:length(names(kegg.list))){
    k.idx <- grep(names(kegg.list)[i], names(bd.k.genes))
    #names(bd.k.genes)[k.idx]
    #print(length(k.idx))
    if(length(k.idx) > 0){
        sub.list <- bd.k.genes[k.idx]
        k.bd.test[[i]] <- unlist(sub.list)
        present.bd <- sapply(strsplit(names(bd.k.genes)[k.idx], ".", fixed = TRUE), function(x) x[1])
        kegg.bd.mat[present.bd,i] <- sapply(sub.list, length)
    }else{
        k.bd.test[[i]] <- NA
    }
}

pdf("~/Desktop/intersection_counts.pdf", width = 12, height = 45)
pheatmap(t(kegg.bd.mat), display_numbers = TRUE, number_format = "%.0f")
dev.off()

bip.mat <- graph_from_biadjacency_matrix(kegg.bd.mat)
no.con <- which(degree(bip.mat) == 0)
simp.net <- delete_vertices(bip.mat, no.con)
vert.col <- rep("#7fc97f", vcount(simp.net))
vert.col[which(V(simp.net)$name %in% names(bd.list))] <- "#beaed4"

pdf("~/Desktop/adj.pdf", width = 9, height = 40)
pheatmap(sqrt(t(kegg.bd.mat)))
dev.off()


test.mat <- jaccard.matrix(k.bd.test)
pdf("~/Desktop/kegg_jaccard.pdf", width = 40, height = 40)
pheatmap(test.mat)
dev.off()

adj.mat <- test.mat
diag(adj.mat) <- 0
k.net <- graph_from_adjacency_matrix(adj.mat, mode = "undirected", weighted = TRUE)
k.deg <- colSums(test.mat)
barplot(sort(k.deg), las = 2)
pdf("~/Desktop/kegg_neg.pdf", width = 40, height = 40)
plot(k.net, vertex.size = 1)
dev.off()

barplot(sort(bd.deg), las = 2, horiz = TRUE)


mean.k.bd.vals <- nested_mean_vals(nested_gene_list = kegg.bd.list, 
    expr.mat = adj_expr, "inner")
names(mean.k.bd.vals) <- names(bd.list)

all.k.bd.result <- vector(mode = "list", length = length(mean.k.bd.vals))
names(all.k.bd.result) <- names(mean.k.bd.vals)
pdf(file.path(kegg.plot.dir, "kegg_bd_intersections_by_BD.pdf"), width = 12, height = 22)
    for(i in 1:length(bd.list)){
        all.k.bd.result[[i]] <- plot_mean_bd_k(mean.bd.k.mat = mean.k.bd.vals[[i]], 
            covar.mat = covar.table, plot.label = names(mean.k.bd.vals)[i])
    }
dev.off()

total.r2 <- sapply(all.k.bd.result, function(x) x[[3]])
max.r2 <- sapply(all.k.bd.result, function(x) if(length(x) > 0){max(x[[2]])}else{NA})

par(mar = c(4, 16, 4, 2))
barplot(sort(unlist(total.r2)), las = 2, horiz = TRUE, xlim = c(-0.05, 0.2))
barplot(sort(unlist(max.r2)), las = 2, horiz = TRUE, xlim = c(0, 0.3))

Stacked intersections

All intersections for all individuals are shown below. This matrix does tend to separate the FC and VS genotypes from each other.

sub.vals <- merge_mean_mats(mean.bd.k.vals)
#pdf("~/Desktop/all_intersections.pdf", width = 15, height = 7)
sub.val.result <- plot_mean_bd_k(mean.bd.k.mat = sub.vals, covar.table, row.names = NULL,
    order.by = "top.rank")

#dev.off()
sub.val.decomp <- sub.val.result$decomp
#barplot(cl.width, col = geno.cols)

The plot below shows that this separation is statistically significant along the first principal component of the matrix.

sub.decomp <- plot.decomp(sub.vals, plot.results = FALSE)
ind.vals <- sub.decomp$v #use the loadings on the individuals

#split loadings by genotype
ind.v <- lapply(geno.idx, function(x) ind.vals[x,1])
aov.test <- aov.by.list(ind.v)

ind.p <- aov.test$"Pr(>F)"[1]
geno.order <- order(sapply(ind.v, mean))
vioplot(ind.v[geno.order], ylab = "PC1", col = geno.cols[geno.order],
    main = paste("Genotypes are distinguishable\np =", signif(ind.p, 2)))
stripchart(ind.v[geno.order], add = TRUE, vertical = TRUE, method = "jitter", 
    col = "#dd1c77", pch = 16)
abline(h = 0)

We did permutations to verify that this p value is drawn from a uniform distribution. We shuffled the individual labels and recalculated the ANOVA. The plot below shows the qq plot of the p value distribution compared to a uniform distribution.

rnd.aov <- vector(mode = "list", length = 1000)
for(p in 1:1000){
    rnd.sample <- sample(1:ncol(sub.vals))
    rnd.geno.idx <- lapply(names(geno.cols), function(x) which(covar.table[rnd.sample,"climb_geno"] == x))
    names(rnd.geno.idx) <- names(geno.cols)
    rnd.ind.v <- lapply(rnd.geno.idx, function(x) ind.vals[x,1])
    #boxplot(rnd.ind.v)
    #aov.by.list(rnd.ind.v)
    rnd.aov[[p]] <- aov.by.list(rnd.ind.v)
}

rnd.p <- sapply(rnd.aov, function(x) x[,"Pr(>F)"][1])

qqunif.plot(rnd.p)

Pathway selection

The loadings on the pathway intersections should tell us which pathways most strongly drive the separation of genotypes. The pathway intersection first two PCs are shown below. We are most interested in the pathways that vary along PC1. There is a skew toward the negative side. These are pahways that are down in FC relative to VS.

high.thresh <- get.percentile(sub.decomp$u[,1], 99)
low.thresh <- get.percentile(sub.decomp$u[,1], 1)

hist(sub.decomp$u[,1], xlab = "PC1", main = "PC1", breaks = 100)
abline(v = c(high.thresh, low.thresh), col = "red")

#plot(sort(sub.decomp$u[,1]), ylab = "PC1")
#abline(h = c(high.thresh, low.thresh))

#plot(sort(sub.decomp$u[,1]), 1:nrow(sub.decomp$u), xlab = "PC1", ylab = "Index")
#abline(v = c(high.thresh, low.thresh))
low.idx <- which(sub.decomp$u[,1] < low.thresh)
high.idx <- which(sub.decomp$u[,1] > high.thresh)

#plot(sub.decomp$u[,1], sub.decomp$u[,2], pch = 16, 
#    xlab = paste0("PC1 (", round(sub.decomp$var.exp[1]*100), "%)"), 
#    ylab = paste0("PC2 (", round(sub.decomp$var.exp[2]*100), "%)"), xlim = c(-0.15, 0.1))
#text(sub.decomp$u[high.idx,1], sub.decomp$u[high.idx,2], labels = rownames(sub.vals)[high.idx], cex = 0.7)
#text(sub.decomp$u[low.idx,1], sub.decomp$u[low.idx,2], labels = rownames(sub.vals)[low.idx], cex = 0.7)

pt.col <- rep("black", nrow(sub.decomp$u))
pt.col[c(high.idx, low.idx)] <- "red"
plot(sub.decomp$u[,1], sub.decomp$u[,2], pch = 16, col = pt.col,
    xlab = paste0("PC1 (", round(sub.decomp$var.exp[1]*100), "%)"), 
    ylab = paste0("PC2 (", round(sub.decomp$var.exp[2]*100), "%)"), xlim = c(-0.15, 0.1))

#int.name <- "Glutathione metabolism : Proteostasis"
#pt.col <- rep("black", nrow(sub.decomp$u))
#pt.col[which(rownames(sub.vals) == int.name)] <- "red"
#plot(sub.decomp$u[,1], sub.decomp$u[,2], pch = 16, col = pt.col,
#    xlab = paste0("PC1 (", round(sub.decomp$var.exp[1]*100), "%)"), 
#    ylab = paste0("PC2 (", round(sub.decomp$var.exp[2]*100), "%)"), xlim = c(-0.15, 0.1))
#abline(v = c(high.thresh, low.thresh))

Top pathways

A clearer image of these pathways is shown below.

large.path <- sub.vals[c(high.idx,low.idx),]
plot_mean_bd_k(large.path, covar.table, label.margin = 20, order.by = "top.rank")

The figure below shows the expression means for each genotype for each of these pathway intersections.

geno.idx <- lapply(names(geno.cols), function(x) which(covar.table[,"climb_geno"] == x))
names(geno.idx) <- names(geno.cols)

geno.mats <- lapply(geno.idx, function(x) large.path[,x])
geno.means <- sapply(geno.mats, rowMeans)

row.order <- hclust(dist(geno.means))$order

#test.mat <- t(apply(geno.means, 1, scale))
par(mar = c(4,20,2,2))
imageWithText(geno.means[row.order,c(2,3,1,4,5)], show.text = FALSE, 
    split.at.vals = TRUE, col.scale = c("blue", "brown"),
    grad.dir = "ends", col.text.shift = 0.02, row.text.cex = 0.7, row.text.shift = 0.15)

#imageWithTextColorbar(geno.means, split.at.vals = TRUE, col.scale = c("blue", "brown"),
#    grad.dir = "ends", cex = 1)

The decomposition below shows very nice separation of the genotypes.

mean.decomp <- plot.decomp(t(geno.means), label.points = TRUE, 
    cols = geno.cols, cex = 1.5, xlim = c(-0.5, 0.7))

#plot(mean.decomp$v)
#test <- plot.decomp(geno.means)

Term networks

We built networks between biodomains and kegg pathways based on the pathway intersections that passed the threshold. We made one for the positive pathways (up in FC) and one for the negative pathways (down in FC).

Node size is just the degree of the node. I realize that this isn’t all that meaninful since the interactions can be a bit redundant. I’m working on developing a better system.

The edge color and width indicates the percent variance explained by genotype for the genes in that interaction.

build_net <- function(edge.names){

    split.names <- strsplit(edge.names, " : ")
    net.kegg <- sapply(split.names, function(x) x[1])
    net.bd <- sapply(split.names, function(x) x[2])

    net <- graph_from_edgelist(cbind(net.bd, net.kegg), directed = FALSE)
    v.col <- rep("#7fc97f", vcount(net))
    v.col[which(V(net)$name %in% names(bd.list))] <- "#beaed4"
    
    V(net)$vertex.color <- v.col
    return(net)
}    


#pdf("~/Desktop/net.pdf", width = 15, height = 15)

high.net <- build_net(edge.names = rownames(sub.vals)[high.idx])
high.deg <- degree(high.net)
high.r2 <- sub.val.result$row.r2[high.idx]
ecol <- colors.from.values(high.r2, col.scale = "blue", grad.dir = "high")
plot(high.net, vertex.color = V(high.net)$vertex.color, 
    vertex.size = scale.between.vals(high.deg*3, 5, 40),
    layout = layout_nicely, main = "Positive Pathways", edge.width = high.r2*20,
    edge.color = ecol, vertex.label.cex = scale.between.vals(high.deg/2, 1, 3))

low.net <- build_net(edge.names = rownames(sub.vals)[low.idx])
low.deg <- degree(low.net)
low.r2 <- sub.val.result$row.r2[low.idx]
ecol <- colors.from.values(low.r2, col.scale = "blue", grad.dir = "high")
plot(low.net, vertex.color = V(low.net)$vertex.color, 
    vertex.size = scale.between.vals(low.deg*3, 5, 40),
    layout = layout_nicely, main = "Negative Pathways", edge.width = low.r2*20,
    edge.color = ecol, vertex.label.cex = scale.between.vals(low.deg/2, 1, 3))

#dev.off()

Individual genes in intersections

We looked more closely at individual intersections to get a better look at how the genes in there are expressed across the genotypes.

plot_individual_genes <- function(intersection.name = NULL, gene.list = NULL, 
    plot.type = c("individual", "means", "boxes", "none"), plot.label = "",
    label.margin = 4, order.by = c("PC", "top.rank"), show.text = TRUE,
    legend.x = NULL, legend.y = NULL){

    order.by = order.by[1]
    plot.type = plot.type[1]
    if(is.null(intersection.name) && is.null(gene.list)){stop("At least one of intersection.name or gene.list must be specified")}

    if(is.null(gene.list)){
        split.name <- strsplit(intersection.name, " : ")
        k.name <- split.name[[1]][1]
        bd.name <- split.name[[1]][2]
        bd.idx <- which(names(kegg.bd.list) == bd.name)
        k.idx <- which(names(kegg.bd.list[[bd.idx]]) == k.name)
        term.genes <- kegg.bd.list[[bd.idx]][[k.idx]]
        plot.label <- intersection.name
    }else{
        term.genes <- gene.list
    }
    common.genes <- intersect(term.genes, colnames(adj_expr))

    if(length(common.genes) == 0){
        plot.text(paste("Cannot find genes for", intersection.name))
    }

    term.expr <- adj_expr[,common.genes,drop=FALSE]
    gene.names <- mouse.genes[match(common.genes, mouse.genes[,"ensembl_gene_id"]), "external_gene_name"]
    colnames(term.expr) <- gene.names

    gene.means <- sapply(geno.idx, function(x) term.expr[x,,drop=FALSE])

    if(ncol(gene.means[[1]]) > 1){    
        mean.mat <- sapply(gene.means, colMeans)
        #plot.decomp(mean.mat, label.points = TRUE)
        gene.decomp <- plot.decomp(mean.mat, plot.results = FALSE)
        gene.order <- order(gene.decomp$u[,1])
    }else{
        mean.mat <- sapply(gene.means, colMeans)
        mean.mat <- matrix(mean.mat, nrow = 1)
        gene.order = 1
    }
    
    
    if(plot.type == "individual"){
        plot_mean_bd_k(mean.bd.k.mat = t(term.expr[,gene.order,drop=FALSE]), 
            covar.mat = covar.table, plot.label = plot.label, 
            label.margin = label.margin, order.by = order.by)
    }

    if(plot.type == "means"){
        imageWithText(mean.mat[gene.order,,drop=FALSE], show.text = show.text, split.at.vals = TRUE, col.scale = c("blue", "brown"),
            grad.dir = "ends", row.text.shift = 0.15, col.text.shift = 0.08, col.text.rotation = 0,
            col.text.adj = 0.5, main = plot.label)
    }

    if(plot.type == "boxes"){
        plot.grouped.boxes(lapply(gene.means, function(x) x[,gene.order,drop=FALSE]), 
            type = "matrix", legend.x = legend.x, legend.y = legend.y,
            group.cols = geno.cols, print.vals = NA, main = plot.label)
        plot.dim <- par("usr")
        segments(x0 = 0, x1 = plot.dim[2], y0 = 0)
    }

    invisible(common.genes)
}

expr_loadings <- function(term.name, gene.list, expr.mat, group.list){
    
    term.idx <- which(names(gene.list) == term.name)
    if(length(term.idx) == 0){stop("Can't find ", term.name)}
    term.genes <- gene.list[[term.idx]]
    common.genes <- intersect(term.genes, rownames(expr.mat))

    if(length(common.genes) == 0){
        plot.text(paste("Cannot find genes for ", term.name))
    }

    term.expr <- expr.mat[common.genes,]
    gene.names <- mouse.genes[match(common.genes, mouse.genes[,"ensembl_gene_id"]), "entrezgene_id"]
    rownames(term.expr) <- gene.names
    group.means <- lapply(group.list, function(x) rowMeans(term.expr[,x]))
    mean.diff <- group.means[[2]] - group.means[[1]]
    #hist(mean.diff)
    #gene.decomp <- plot.decomp(t(term.expr), label.points = TRUE)
    #gene.decomp <- plot.decomp(term.expr, plot.results = FALSE)
    #gene.v <- gene.decomp$v[,1]
    #names(gene.v)  <- gene.names
    #return(gene.v)  
    return(mean.diff)
}
#term.name <- rownames(sub.vals)[high.idx[1]]
#term.name <- "Longevity regulating pathway : Endolysosome"
#term.name <- "Alzheimer disease : Mitochondrial Metabolism"
term.name <- "ECM-receptor interaction : Synapse"

intersection.dir <- file.path(results.dir, "Intersection_Plots")
if(!file.exists(intersection.dir)){dir.create(intersection.dir)}


#Paths with high values on PC1
pdf(file.path(intersection.dir, "High_Paths_Individuals.pdf"), width = 10, height = 5)
for(i in 1:length(high.idx)){
    plot_individual_genes(intersection.name = rownames(sub.vals)[high.idx[i]], 
        plot.type = "individual")
}
dev.off()
## quartz_off_screen 
##                 2
pdf(file.path(intersection.dir, "High_Paths_Means.pdf"), width = 5, height = 7)
for(i in 1:length(high.idx)){
    plot_individual_genes(intersection.name = rownames(sub.vals)[high.idx[i]], plot.type = "means")
}
dev.off()
## quartz_off_screen 
##                 2
pdf(file.path(intersection.dir, "High_Paths_Boxes.pdf"), width = 20, height = 5)
for(i in 1:length(high.idx)){
    plot_individual_genes(intersection.nam = rownames(sub.vals)[high.idx[i]], plot.type = "boxes")
}
dev.off()
## quartz_off_screen 
##                 2
#Paths with low values on PC1
pdf(file.path(intersection.dir, "Low_Paths_Individuals.pdf"), width = 10, height = 5)
for(i in 1:length(low.idx)){
    plot_individual_genes(intersection.nam = rownames(sub.vals)[low.idx[i]], plot.type = "individual")
}
dev.off()
## quartz_off_screen 
##                 2
pdf(file.path(intersection.dir, "Low_Paths_Means.pdf"), width = 5, height = 9)
    for(i in 1:length(low.idx)){
        plot_individual_genes(intersection.nam = rownames(sub.vals)[low.idx[i]], plot.type = "means")
    }
dev.off()
## quartz_off_screen 
##                 2
pdf(file.path(intersection.dir, "Low_Paths_Boxes.pdf"), width = 20, height = 5)
for(i in 1:length(low.idx)){
    plot_individual_genes(intersection.nam = rownames(sub.vals)[low.idx[i]], plot.type = "boxes")
}
dev.off()
## quartz_off_screen 
##                 2
#get all top paths that have a given term, and look at 
#expression of those genes

#this function looks at the top pathways (either positive or negative)
#It collect all the genes that fall under a single term across multiple
#interactions, for example, all KEGG-Biodomain interactions that involve
#Synapse. It plots the gene level statistics for all the genes, as well
#as the enrichment for those genes. This might help untangle synaptic 
#genes that are upregulated in VS vs. those that are downregulated, for
#example.

#Positive and negative are sort of arbitraty because they depend
#on the sign of the PC, but you can tell from the plots whether 
#VS is up or down

plot_merged_list <- function(search.term, path.type = c("Positive", "Negative")){

    if(path.type == "Positive"){
        app.idx <- grep(search.term, rownames(sub.vals)[high.idx]); name.list <- rownames(sub.vals)[high.idx]
    }else{
        app.idx <- grep(search.term, rownames(sub.vals)[low.idx]); name.list <- rownames(sub.vals)[low.idx]
    }

    if(length(app.idx) == 0){
        stop(paste("I couldn't find", search.term))
    }

    app.int <- name.list[app.idx]
    print(app.int)

    app.list <- lapply(app.int, function(x) plot_individual_genes(x, plot.type = "none"))
    app.genes <- Reduce("union", app.list)
    app.result <- plot_individual_genes(gene.list = app.genes, 
        plot.label = paste(search.term, "genes in", path.type, "Pathways"))

    app.enrich <- gost(app.genes, organism = "mmusculus", 
        sources = c("GO", "KEGG", "REACTOME", "CORUM", "HP"))

    quartz()
    plot.enrichment(app.enrich, num.terms = 30, 
        plot.label = paste("Enrichment of", search.term, "genes in", path.type, "Pathways"))
}

plot_merged_list("Synapse", "Positive"); #cell adhesion
plot_merged_list("Synapse", "Negative"); #ribosome, translation at synapse


test.genes <- plot_individual_genes(intersection.nam = "Ras signaling pathway : APP Metabolism")
enrich <- gost(test.genes, organism = "mmusculus", sources = c("GO", "KEGG", "REACTOME", "CORM", "HP"))

par(mar = c(4,20, 2,2))
plot.enrichment(enrich, num.terms = 30, max.char = 100)
#cat(test.genes, sep = "\n")

I think a lot of these terms have similar genes in them. The code blow generates Jaccard index matrices and plots them to the Intersection Plot folder. The names are too long to plot in the html.

Interestingly, “Ribosome : Proteostasis” and “Ribosome : Structural stabilization” are basically the same set of genes, but other than that, the lists are fairly independent. There is a lot of overlap among the mitochondrial gene pathway intersections and the ribosome pathway intersections, but not perfect overlap.

There is not a lot of overlap among the ox-phos pathway intersections. The “Retrograde endocannabanoid signaling : Mitochondrial metabolism” intersection is also fairly distinct from the other Mitochondrial metabolism pathway intersections.

high.genes <- vector(mode = "list", length = length(high.idx))
names(high.genes) <- rownames(sub.vals)[high.idx]
for(i in 1:length(high.idx)){
    high.genes[[i]] <- plot_individual_genes(intersection.name = rownames(sub.vals)[high.idx[i]], plot.type = "none")
}

high.gene.jaccard <- jaccard.matrix(high.genes)

pdf(file.path(intersection.dir, "High_Paths_Jaccard.pdf"), width = 10, height = 10)
pheatmap(high.gene.jaccard)

dev.off()
## pdf 
##   3
low.genes <- vector(mode = "list", length = length(low.idx))
names(low.genes) <- rownames(sub.vals)[low.idx]
for(i in 1:length(low.idx)){
    low.genes[[i]] <- plot_individual_genes(intersection.nam = rownames(sub.vals)[low.idx[i]], plot.type = "none")
}
low.gene.jaccard <- jaccard.matrix(low.genes)

pdf(file.path(intersection.dir, "Low_Paths_Jaccard.pdf"), width = 12, height = 12)
pheatmap(low.gene.jaccard)
dev.off()
## pdf 
##   3

Term Connections

In the networks, biodomains are linked through KEGG pathways. The biodomains were designed to be as non-overlapping as possible. However, we know that they are correlated both through gene expression correlations and through interacting biology. The connections to KEGG pathways might give us a sense about how different aspects of Alzheimer’s disease are related to each other. By identifying genes that are differentially expressed across the genotypes and link biodomains together, we might be able to identify shared biology across the domains and identify causal factors that affect multiple pieces of the pie.

Below we investigate genes that link biodomains through KEGG pathways.

bd.names <- c("Synapse", "Apoptosis", "Vasculature"); kegg.names <- c("ECM-receptor interaction")
#bd.names <- c("Immune Response", "Synapse"); kegg.names <- c("Ribosome")
#bd.names <- c("Lipid Metabolism"); kegg.names <- c("Bacterial invasion of epithelial cells", "Adherens junction")

term.pairs <- bipartite_pairs(bd.names, kegg.names, "Biodomains", "KEGG")

pair.genes <- vector(mode = "list", length = nrow(term.pairs))
names(pair.genes) <- apply(term.pairs, 1, function(x) paste(x, collapse = " : "))
for(i in 1:nrow(term.pairs)){
    bd.idx <- which(names(kegg.bd.list) == term.pairs[i,"Biodomains"])
    k.idx <- which(names(kegg.bd.list[[bd.idx]]) == term.pairs[i,"KEGG"])
    pair.genes[[i]] <- kegg.bd.list[[bd.idx]][[k.idx]]
}


plotVenn(pair.genes)
## Loading required package: VennDiagram
## Loading required package: futile.logger

The plot below shows the stats for the genes in the three-way intersection.

connector.genes <- Reduce("intersect", pair.genes)
#cat(connector.genes, sep = "\n")

#mousemine GO network
#put the connector genes into mouse mine, filter the GO term table
#to relevant GO terms, and download
#plot the genes and GO terms as a network
#It will probably be messy, but it is a way to see
#which genes in a list are involved in relevant functions
#for example ribosomal genes that are at the synapse and involved
#in immune function.
#mouse.table <- read.delim("~/Downloads/MFeature_GO_test.tsv", header = FALSE)
#pdf("~/Desktop/go_net.pdf", width = 20, height = 20)
#mousemine_network(mouse.table, gene.col = 2, min.cex = 2, min.v.size = 3, max.v.size = 10)
#dev.off()


#png(file.path(results.dir, "connector.png"), width = 10, height = 5, units = "in", res = 300)
    plot_individual_genes(gene.list = connector.genes, 
        plot.type = "individual", plot.label = "", label.margin = 4, order.by = "PC")

#dev.off()

#connector.enrich <- gost(connector.genes, organism = "mmusculus", sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
#plot.enrichment(connector.enrich, num.terms = 30, max.term.size = 3000)
#cat(connector.genes, sep = "\n")

#plot_individual_genes(gene.list = connector.genes, plot.type = "boxes", plot.label = "", legend.x = 36, legend.y = 4)
#plot_individual_genes(gene.list = connector.genes, plot.type = "means", plot.label = "")

#gene_vioplot("Itga4")
#gene_vioplot("Itgav")
#gene_vioplot("Itgb1")
#gene_vioplot("Itgb3")
#gene_vioplot("Fn1")
#gene_vioplot("Rpl13a")

#kl.targets <- c("Fgf23", "Fgfr1", "Fgfr2", "Fgfr3", "Fgfr4", "Klb", "Lctl", "Trpv5")
#par(mfrow = c(2,4))
#for(i in 1:length(kl.targets)){
#    gene_vioplot(kl.targets[i])
#}
library(pathview)

kegg.file <- here("Data", "general", "KEGG.Mouse.RDS")
if(!file.exists(kegg.file)){
    all.kegg <- download_KEGG("mmu", "KEGG", "kegg")
    saveRDS(all.kegg, kegg.file)
}else{
    all.kegg <- readRDS(kegg.file)
}

u_path <- gsub(" - Mus musculus (house mouse)", "", all.kegg[[2]][,"to"], fixed = TRUE)

# build GSEA list
path.id <- all.kegg[[2]][,1]
path.idx <- lapply(path.id, function(x) which(all.kegg[[1]][,1] == unlist(x)[1]))
path.gene.id <- lapply(path.idx, function(x) all.kegg[[1]][x,2])
names(path.gene.id) <- u_path

path.gene.ensembl <- lapply(path.gene.id, 
    function(x) mouse.genes[match(x, mouse.genes[,"entrezgene_id"]), "ensembl_gene_id"])


comp_groups <- list("FC" = grep("FC", covar.table[,"climb_geno"]), "VS" = grep("VS", covar.table[,"climb_geno"]))

#int.name <- rownames(sub.vals)[high.idx[1]]
#int.name <- "Longevity regulating pathway : Endolysosome"
#int.name <- "Alzheimer disease : Mitochondrial Metabolism"

kegg.name <- "Phagosome"
kegg.name <- "Cell adhesion molecules"
kegg.name <- "Longevity regulating pathway"
kegg.name <- "Ribosome"
kegg.name <- "Oxidative phosphorylation"
kegg.name <- "Proteasome"
kegg.name <- "ECM-receptor interaction"
kegg.name <- "Glutamatergic synapse"

expr.vals <- expr_loadings(kegg.name, kegg.list, t(adj_expr), comp_groups)
#hist(expr.vals)
scaled.vals <- scale.between.vals(expr.vals, -1, 1)
#hist(scaled.vals)
#plot(expr.vals, scaled.vals);abline(h = 0, v = 0)
path.num <- path.id[which(u_path == kegg.name)]
pv.out <- pathview(gene.data = scaled.vals, 
    pathway.id = path.num, species = "mmu", 
    out.suffix = kegg.name,
    kegg.dir = intersection.dir)
for(k in 1:length(kegg.list)){
    kegg.vals <- expr_loadings(names(kegg.list)[i], kegg.list, t(adj_expr), comp_groups)   
    boxplot(kegg.vals)
}


all.kegg.diff <- lapply(names(kegg.list), function(x) expr_loadings(x, kegg.list, t(adj_expr), comp_groups))

diff.mean <- sapply(all.kegg.diff, mean)
boxplot(all.kegg.diff[order(diff.mean)])
abline(h = 0)

names(kegg.list)[head(order(diff.mean))]
names(kegg.list)[tail(order(diff.mean))]

common.kegg <- lapply(kegg.list, function(x) intersect(x, colnames(adj_expr)))
kegg.means <- sapply(common.kegg, function(x) rowMeans(adj_expr[,x]))
plot_mean_bd_k(t(kegg.means), covar.table)

kegg.decomp <- plot.decomp(t(kegg.means))
kegg.vals <- kegg.decomp$u[,1]
low.thresh <- get.percentile(kegg.vals, 1)
high.thresh <- get.percentile(kegg.vals, 99)

high.kegg <- which(kegg.vals >= high.thresh)
low.kegg <- which(kegg.vals <= low.thresh)

#names(kegg.list)[high.kegg]
#names(kegg.list)[low.kegg]
extreme.kegg <- names(kegg.list)[c(high.kegg, low.kegg)]
for(k in 1:length(extreme.kegg)){
    expr.vals <- expr_loadings(extreme.kegg[k], kegg.list, t(adj_expr), comp_groups)
    #print(length(expr.vals))
    if(length(expr.vals) > 10){
        #hist(expr.vals)
        scaled.vals <- scale.between.vals(expr.vals, -1, 1)
        #hist(scaled.vals)
        #plot(expr.vals, scaled.vals);abline(h = 0, v = 0)
        path.num <- path.id[which(u_path == extreme.kegg[k])]
        pv.out <- try(pathview(gene.data = scaled.vals, 
            pathway.id = path.num, species = "mmu", 
            out.suffix = extreme.kegg[k],
            kegg.dir = intersection.dir))
    }
}
#here we plot violin plots of specific terms we are interested in.

pdf("~/Desktop/mapk.pdf", width = 7, height = 5)
term_vioplot(outer.term = "MAPK signaling pathway", inner.term = "Apoptosis", 
    mean.term.vals = mean.bd.k.vals)

term_vioplot(outer.term = "MAPK signaling pathway", inner.term = "DNA Repair", 
    mean.term.vals = mean.bd.k.vals)
dev.off()


term_vioplot(outer.term = "Type II diabetes mellitus", mean.term.vals = mean.k.vals)
term_vioplot(outer.term = "Proteasome", mean.term.vals = mean.k.vals)
#This block look for gene sets and plots their expression
#and decomposition by individual

#search.term <- c("Itga", "Itgb")
#search.term <- c("Fgf")
#search.term = "Grin"
search.term = "Mapk"

plot.genes <- unlist(lapply(search.term, function(x) mouse.genes[grep(x, mouse.genes[,"external_gene_name"]),"external_gene_name"]))
length(plot.genes)

gene.id <- mouse.genes[which(mouse.genes[,"external_gene_name"] %in% plot.genes),"ensembl_gene_id"]
common.genes <- intersect(colnames(adj_expr), gene.id)
names(common.genes) <- mouse.genes[match(common.genes, mouse.genes[,"ensembl_gene_id"]),"external_gene_name"]
gene.expr <- lapply(geno.idx, function(x) adj_expr[x,common.genes])
plot_individual_genes(gene.list = common.genes, order.by = "top.rank")

plot_individual_genes(gene.list = common.genes["Grin2b"], order.by = "top.rank")

for(i in 1:length(gene.expr)){
    colnames(gene.expr[[i]]) <- names(common.genes)
}
plot.grouped.boxes(gene.expr, type = "matrix", group.cols = geno.cols, main = search.term,
    print.vals = NA)
plot.dim <- par("usr")
segments(x0 = plot.dim[1], x1 = plot.dim[2], y0 = 0)
#This block looks at correlations between genes in 
#a given intersection
int.genes <- plot_individual_genes(intersection.name = "ECM-receptor interaction : Synapse")
int.names <- mouse.genes[match(int.genes, mouse.genes[,"ensembl_gene_id"]),"external_gene_name"]
int.expr <- adj_expr[,int.genes]
colnames(int.expr) <- int.names
imageWithText(signif(cor(int.expr), 2))


term.name = "Ribosome"
term.idx <- which(names(kegg.list) == term.name)
plot_individual_genes(gene.list = kegg.list[[term.idx]], 
        plot.type = "individual", plot.label = "", label.margin = 4)


kegg.vals <- non_nested_mean_vals(kegg.list, adj_expr)
kegg.result <- plot_mean_bd_k(kegg.vals, covar.mat = covar.table, plot.label = "KEGG")
kegg.r2 <- kegg.result$row.r2

par(mar = c(4,22,2,2))
barplot(tail(sort(kegg.r2), 20), las = 2, horiz = TRUE)
#this block looks at enrichments of particular interactions
#in the KEGG-Biodomain network

term.name <- "ECM-receptor interaction : Synapse"
term.name <- "Cell adhesion molecules : Synapse"
term.name <- "Type II diabetes mellitus : Synapse"
term.name <- "Parkinson disease : DNA Repair"
term.name <- "Ras signaling pathway : APP Metabolism"
int.genes <- plot_individual_genes(intersection.name = term.name)
int.enrich <- gost(int.genes, organism = "mmusculus", sources = c("GO", "KEGG", "REACTOME", "HP", "CORUM"))
plot.enrichment(int.enrich, max.term.size = 3000, num.terms = 30, plot.label = term.name)
#checking results against
#Morphological and biochemical signs of age-related neurodegenerative changes in klotho mutant mice

test.genes <- c("Nefh", "Nefm", "Nefl", "Ina", "Prph", "Nes") #neurofilament genes
test.genes <- c("Map2") #increased in Klotho-deficient mice
test.genes <- "Ctsd" #lysosomal gene
test.genes <- c("Map1a", "Map1b")
test.genes <- "Bcl2l1" #antiapoptotic
test.genes <- "Bax" #proapoptotic
test.genes <- "Mapk1"
test.genes <- c("Gfap", "Vim", "Synm", "Nes") #astrocyte microfilament genes
test.genes <- c("Apoe", "Map3k12", "Mapk27", "Mapk1", "Fos", "App", "Ldlr")
test.genes <- c("Apoe", "App", "Kl")
test.id <- mouse.genes[match(test.genes, mouse.genes[,"external_gene_name"]), "ensembl_gene_id"]

plot_individual_genes(gene.list = test.id, order.by = "top.rank",
        plot.type = "individual", plot.label = "", label.margin = 4)


plot_individual_genes(gene.list = test.id,
        plot.type = "boxes", plot.label = "", label.margin = 4)

common.genes <- intersect(test.id, colnames(adj_expr))
common.names <- mouse.genes[match(common.genes, mouse.genes[,"ensembl_gene_id"]), "external_gene_name"]
expr.mat <- adj_expr[,common.genes]
colnames(expr.mat) <- common.names
pheatmap(cor(expr.mat), display_numbers = TRUE)

gene1 <- "Apoe"; gene2 <- "App"

pt.col <- rep(NA, nrow(covar.table))
for(i in 1:length(geno.cols)){
    pt.col[which(covar.table[,"climb_geno"] == names(geno.cols)[i])] <- geno.cols[i]
}
plot.with.model(expr.mat[,gene1], expr.mat[,gene2], xlab = gene1, ylab = gene2, col = pt.col)

xlim = c(min(expr.mat[,gene1]), max(expr.mat[,gene1]))
ylim = c(min(expr.mat[,gene2]), max(expr.mat[,gene2]))
pdf("~/Desktop/gene_comparison.pdf", width = 5, height = 8)
layout.mat <- matrix(c(6,1,2,3,4,5), ncol = 2, byrow = TRUE)
layout(layout.mat)
for(i in 1:length(geno.idx)){
    plot.with.model(expr.mat[geno.idx[[i]],gene1], expr.mat[geno.idx[[i]],gene2], 
    xlab = gene1, ylab = gene2, col = pt.col[geno.idx[[i]]], xlim = xlim, ylim = ylim,
    main = names(geno.idx)[i], cex = 1.5, cex.lab = 1.5)
}
#mtext(paste(gene1, "and", gene2), side = 3, outer = TRUE, line = -1.5, font = 2)
plot.with.model(expr.mat[,gene1], expr.mat[,gene2], xlab = gene1, ylab = gene2, 
    col = pt.col, cex = 1.5, cex.lab = 1.5, main = "All")
dev.off()
#I checked WGCNA. Unfortunately, it didn't really yield much.

library(WGCNA)

enableWGCNAThreads()

powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(adj_expr, powerVector = powers, verbose = 5)
use.power <- sft$powerEstimate
if(is.na(use.power)){use.power = 6}
use.power

#signed if no negative values
#unsigned if negative values are included
#type = "unsigned"
type = "signed"
net = blockwiseModules(adj_expr, power = use.power, 
    TOMType = type, minModuleSize = 10, 
    reassignThreshold = 0, mergeCutHeight = 0.25, 
    numericLabels = TRUE, pamRespectsDendro = FALSE, 
    saveTOMs = FALSE,verbose = 3)

mergedColors = labels2colors(net$colors)
modules <- net$colors
num.wgcna.modules <- length(unique(modules))

u_mods <- sort(unique(modules))
mod.size <- sapply(u_mods, function(x) length(which(modules == x)))
barplot(mod.size)


enrich.file <- file.path(results.dir, paste0("WGCNA_Module_Enrichment_", type, ".RDS"))
if(!file.exists(enrich.file)){
    mod.enrich <- lapply(u_mods, function(x) gost(names(which(modules == x)), 
        organism = "mmusculus", sources = c("GO", "KEGG", "REACTOME", "CORUM", "HP")))
    saveRDS(mod.enrich, enrich.file)
}else{
    mod.enrich <- readRDS(enrich.file)
}

pdf("~/Desktop/enrichment.pdf", width = 9, height = 40)
plot.enrichment.group(mod.enrich, max.term.size = 3000, transformation = "sqrt")
dev.off()

wgcna.list <- lapply(u_mods, function(x) names(which(modules == x)))
gene.r2 <- vector(mode = "list", length = length(wgcna.list))
mod.r2 <- rep(NA, length(wgcna.list))

for(i in 1:length(u_mods)){
    print(i)
    ind.genes <- lapply(wgcna.list[[i]], function(x) x)
    wgcna.vals <- non_nested_mean_vals(ind.genes, adj_expr)
    rownames(wgcna.vals) <- unlist(ind.genes)
    wgcna.result <- plot_mean_bd_k(wgcna.vals, covar.mat = covar.table, plot.results = FALSE)
    gene.r2[[i]] <- wgcna.result$row.r2
    mod.r2[i] <- wgcna.result$overall.r2
}

barplot(sort(mod.r2))

mod.order <- order(mod.r2, decreasing = TRUE)
i = 1
ind.result <- plot_individual_genes(gene.list = names(which(modules == u_mods[mod.order[i]])))

quartz()
plot.enrichment(mod.enrich[[u_mods[high.pred[i]]]], max.term.size = 3000)


hist(unlist(gene.r2))

sorted.genes <- sort(unlist(gene.r2), decreasing = TRUE)
plot(sorted.genes)


#This gives basically the same results to other analyses.
library(fgsea)
#gsea.enrich <- fgsea::fgsea(kegg.list, sorted.genes, scoreType = "pos")
#gsea.enrich <- fgsea::fgsea(bd.list, sorted.genes, scoreType = "pos")
gsea.enrich <- fgsea::fgsea(unlist(kegg.bd.list, recursive = FALSE), sorted.genes, scoreType = "pos")


norm.es <- as.numeric(as.matrix(gsea.enrich[,"NES"]))
names(norm.es) <- gsea.enrich$pathway

par(mar = c(4,24, 2,2))
barplot(tail(sort(norm.es), 19), las = 2, horiz = TRUE)